clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

**NDC**
import excel "../raw/FDA_NDC/data/product.xlsx", sheet("product") firstrow clear
rename *, lower
gen drug_name = lower(nonproprietaryname)
replace drug_name = lower(drug_name)
replace drug_name = strtrim(drug_name)
gen keep = 0
foreach drug_name in `drug_names' {
	replace keep = 1 if strpos(drug_name, "`drug_name'")!=0
}

gen first_word = substr(drug_name,1,strpos(drug_name," ")-1)
replace first_word = strtrim(first_word)
replace first_word = drug_name if first_word ==""

gen ndc_drug=""
foreach name in amisulpride amitriptyline aripiprazole bupropion citalopram ////
	clomipramine clozapine duloxetine escitalopram fluoxetine fluvoxamine ///
	haloperidol mirtazapine olanzapine paroxetine quetiapine reboxetine ///
	risperidone sertraline trazodone venlafaxine ziprasidone {
	replace ndc_drug ="`name'" if drug_name=="`name'"
	replace ndc_drug ="`name'" if first_word=="`name'"
}

keep if ndc_drug!=""
keep ndc_drug productndc
rename productndc ndc

replace ndc = subinstr(ndc,"-","",.)
format ndc_drug %50s
bys ndc: keep if _n==1
save ../derived/NDC/ndc.dta, replace


** MEPS**
use ../derived/MEPS/meps_1996_2019.dta, clear

replace rxname=lower(rxname)
gen drug =""
foreach name in amisulpride amitriptyline aripiprazole bupropion citalopram ////
	clomipramine clozapine duloxetine escitalopram fluoxetine fluvoxamine ///
	haloperidol mirtazapine olanzapine paroxetine quetiapine reboxetine ///
	risperidone sertraline trazodone venlafaxine ziprasidone {
	replace drug ="`name'" if strpos(rxname, "`name'")!=0
}
replace drug = "amisulpride" if strpos(rxname, "solian")!=0
replace drug = "amitriptyline" if strpos(rxname, "elavil")!=0
replace drug = "aripiprazole" if strpos(rxname, "abilify")!=0
replace drug = "bupropion" if strpos(rxname, "wellbutrin")!=0
replace drug = "citalopram" if strpos(rxname, "celexa")!=0
replace drug = "clomipramine" if strpos(rxname, "anafranil")!=0
replace drug = "clozapine" if strpos(rxname, "clozaril")!=0
replace drug = "duloxetine" if strpos(rxname, "cymbalta")!=0
replace drug = "escitalopram" if strpos(rxname, "cipralex")!=0
replace drug = "fluoxetine" if strpos(rxname, "prozac")!=0
replace drug = "fluvoxamine" if strpos(rxname, "luvox")!=0
replace drug = "haloperidol" if strpos(rxname, "hadol")!=0
replace drug = "mirtazapine" if strpos(rxname, "mirtazapine")!=0
replace drug = "olanzapine" if strpos(rxname, "zyprexa")!=0
replace drug = "paroxetine" if strpos(rxname, "seoxat")!=0
replace drug = "paroxetine" if strpos(rxname, "paxil")!=0
replace drug = "quetiapine" if strpos(rxname, "seroquel")!=0
replace drug = "reboxetine" if strpos(rxname, "edronax")!=0
replace drug = "risperidone" if strpos(rxname, "risperdal")!=0
replace drug = "sertraline" if strpos(rxname, "zoloft")!=0
replace drug = "trazodone" if strpos(rxname, "molipaxin")!=0
replace drug = "venlafaxine" if strpos(rxname, "effexor")!=0
replace drug = "venlafaxine" if strpos(rxname, "efexor")!=0
replace drug = "ziprasidone" if strpos(rxname, "geodon")!=0

replace rxndc = substr(rxndc,2,8)
rename rxndc ndc
merge m:1 ndc using ../derived/NDC/ndc.dta, keepusing(ndc_drug) assert(1 2 3) keep(1 3) nogen

replace drug = ndc_drug if drug==""
gen n=1
collapse (sum) weight n, by(year drug)

*Approved in Europe in 2009; not yet approved in the USA
gen approve_year = . if drug=="agomelatine"
replace approve_year = 1961 if drug=="amitriptyline"
replace approve_year = 1985 if drug=="bupropion"
replace approve_year = 1998 if drug=="citalopram"
replace approve_year = 1989 if drug=="clomipramine"
replace approve_year = 2007 if drug=="desvenlafaxine"
*Approved for depression in 2004, for anxiety in 2007
replace approve_year = 2004 if drug=="duloxetine"
replace approve_year = 2002 if drug=="escitalopram"
replace approve_year = 1987 if drug=="fluoxetine"
replace approve_year = 1994 if drug=="fluvoxamine"
replace approve_year = 2013 if drug=="levomilnacipran"
*Approved in 2009 but only for fibromyglia
replace approve_year = . if drug=="milnacipran"
replace approve_year = 1994 if drug=="mirtazapine"
replace approve_year = 1994 if drug=="nefazodone"
replace approve_year = 1992 if drug=="paroxetine"
*Approved in Europe in 1997; not yet approved in the USA
replace approve_year = . if drug=="reboxetine"
replace approve_year = 1991 if drug=="sertraline"
replace approve_year = 1981 if drug=="trazodone"
replace approve_year = 1993 if drug=="venlafaxine" | drug=="venlafaxine XR"
replace approve_year = 2011 if drug=="vilazodone"
replace approve_year = 2013 if drug=="vortioxetine"
replace approve_year= . if drug=="placebo"

*Not approved by the FDA in the USA now
replace approve_year = . if drug == "amisulpride"
replace approve_year = 2002 if drug == "aripiprazole"
replace approve_year = 2009 if drug == "asenapine"
replace approve_year = 1989 if drug == "clozapine"
replace approve_year = 1957 if drug == "chlorpromazine"
replace approve_year = 1967 if drug == "haloperidol"
replace approve_year = 2009 if drug == "iloperidone"
replace approve_year = 2010 if drug == "lurasidone"
replace approve_year = 1996 if drug == "olanzapine"
replace approve_year = 2006 if drug == "paliperidone"
replace approve_year = . if drug == "placebo"
replace approve_year = 1997 if drug == "quetiapine"
replace approve_year = 1993 if drug == "risperidone"
*Not approved in US
replace approve_year = . if drug == "sertindole"
replace approve_year = 2001 if drug == "ziprasidone"
*Not approved in US
replace approve_year = . if drug == "zotepine"

gen patent_year = . if drug=="agomelatine"
replace patent_year = 1980 if drug=="amitriptyline"
replace patent_year = 2006 if drug=="bupropion"
replace patent_year = 2003 if drug=="citalopram"
replace patent_year = 1983 if drug=="clomipramine"
replace patent_year = 2022 if drug=="desvenlafaxine"
replace patent_year = 2013 if drug=="duloxetine"
replace patent_year = 2012 if drug=="escitalopram"
replace patent_year = 2001  if drug=="fluoxetine"
replace patent_year = 2000 if drug=="fluvoxamine"
replace patent_year = 2030  if drug=="levomilnacipran"
replace patent_year = . if drug=="milnacipran"
replace patent_year = 2004 if drug=="mirtazapine"
replace patent_year = 2003  if drug=="nefazodone"
replace patent_year = 2003 if drug=="paroxetine"
replace patent_year = . if drug=="reboxetine"
replace patent_year = 2006 if drug=="sertraline"
replace patent_year = 1990  if drug=="trazodone"
replace patent_year = 2008  if drug=="venlafaxine" | drug=="venlafaxine XR"
replace patent_year = 2022  if drug=="vilazodone"
replace patent_year = 2022 if drug=="vortioxetine"
replace patent_year= . if drug=="placebo"

replace patent_year = 2008 if drug == "amisulpride"
replace patent_year = 2014 if drug == "aripiprazole"
replace patent_year = 2020 if drug == "asenapine"
replace patent_year = 1998 if drug == "clozapine"
replace patent_year = 1970 if drug == "chlorpromazine"
replace patent_year = 1986 if drug == "haloperidol"
replace patent_year = 2016 if drug == "iloperidone"
replace patent_year = 2019 if drug == "lurasidone"
replace patent_year = 2011 if drug == "olanzapine"
replace patent_year = 2019 if drug == "paliperidone"
replace patent_year = . if drug == "placebo"
replace patent_year = 2017 if drug == "quetiapine"
replace patent_year = 2003 if drug == "risperidone"
replace patent_year = . if drug == "sertindole"
replace patent_year = 2012 if drug == "ziprasidone"
replace patent_year = . if drug == "zotepine"

drop if year< approve_year & approve_year!=.
gen five_years_post = year >= approve_year & year<=approve_year+4
gen patent_expiry = year >= patent_year-1 & year<=patent_year+1
bys drug (year): gen first_five = _n<=5

bys year: egen total_weight = max(weight)

gen five_year_weight = weight if five_years_post==1
gen five_year_n = n if five_years_post==1
gen five_year_share = weight*100/total_weight if five_years_post==1

gen first_five_weight = weight if first_five==1
gen first_five_n = n if first_five==1
gen first_five_share = weight*100/total_weight if first_five==1

gen patent_expiry_weight = weight if patent_expiry==1
gen patent_expiry_n = n if patent_expiry==1
gen patent_expiry_share = weight*100/total_weight if patent_expiry==1

collapse (mean) five_year_* patent_expiry_* first_five_*, by(drug)
drop if drug==""
save ../derived/MEPS/meps_spending.dta, replace

*Remove this file to save space
cap erase ../derived/MEPS/meps_1996_2019.dta

